First ckeck if all necessary packages and modules (e.g., yfinance) are installed
# !pip install yfinance
import numpy as np
import cvxpy as cp
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
yf.pdr_override()
from matplotlib import animation
from IPython.display import HTML
from matplotlib import rc
from sklearn.preprocessing import scale
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')
In this problem we aim to model the log-prices of a set of stocks $\mathbf{x}_t$ as linear combination of factors $f_t$. As mentioned in the homework, we have a multi-factor model in which the S&P500 index and the Bitcoin price are used as factors. The coefficients $\boldsymbol{\beta}_i$ measures the degree of variability of the $i$-th stock with respect the factor $f_t$. The larger $\boldsymbol{\beta}_i$, the higher the sensitivity of the $i$-th stock will be, meaning that small changes in $f_t$ will lead to large changes in $\mathbf{x}_{t,i}$, which may be undesirable in many scenarios. $\mathbf{\alpha}_i$ is the part of the log-price of the $i$-th stock $\mathbf{x}_{t,i}$ which is not explained by the factor $f_t$. Hence, the higher $\mathbf{\alpha}_i$ the higher is the return of stock $i$ with respect to the factors $f_t$. Fianlly, $\boldsymbol{\epsilon}_i$ denotes the risk associated with the $i$-th stock, i.e., risks that cannot be explained by the factors and are usually known as unsystematic risk.
First, we import the data using the pdr.get_data_yahoomodule. We then merge the data corresponding to the stocks and the factors (using join method), interpolate the data and finally detach those two parts.
Fianlly we compute the log returns via .apply(np.log).apply(np.diff) method.
stocks = ['TSLA', 'AMZN', 'EBAY', 'AAPL', 'MSFT', 'META', 'GOOGL',
'NFLX', 'IBM', 'NVDA']
indices = ['^GSPC', 'BTC-USD']
prices = pdr.get_data_yahoo(stocks, start="2019-01-01", end="2022-12-31")[['Adj Close']]
factors = pdr.get_data_yahoo(indices, start="2019-01-01", end="2022-12-31")[['Adj Close']]
data_merged = factors.join(prices)
data_merged = data_merged.interpolate()
prices = data_merged['Adj Close'][stocks].fillna(1)
factors = data_merged['Adj Close'][indices].fillna(1)
log_ret_prices = prices.apply(np.log).apply(np.diff)
log_ret_factors = factors.apply(np.log).apply(np.diff)
[*********************100%***********************] 10 of 10 completed [*********************100%***********************] 2 of 2 completed
print(prices.head())
TSLA AMZN EBAY AAPL MSFT META \
Date
2019-01-01 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
2019-01-02 20.674667 76.956497 27.043747 38.047050 96.632675 135.679993
2019-01-03 20.024000 75.014000 26.546926 34.257282 93.077736 131.740005
2019-01-04 21.179333 78.769501 27.156227 35.719700 97.406708 137.949997
2019-01-05 21.563111 79.671501 27.312461 35.693200 97.448120 137.983332
GOOGL NFLX IBM NVDA
Date
2019-01-01 1.000000 1.000000 1.000000 1.000000
2019-01-02 52.734001 267.660004 89.489304 33.799732
2019-01-03 51.273499 271.200012 87.702805 31.757654
2019-01-04 53.903500 297.570007 91.128258 33.792286
2019-01-05 53.867667 303.493337 91.343155 34.388615
$\mathbf{X}\in \mathbb{R}^{N\times T}$ represents the log-returns of the stocks
$\mathbf{F} = [\mathbf{1}, \mathbf{f}^\top]^\top \in \mathbb{R}^{3\times T}$ where $\mathbf{f}\in \mathbb{R}^{2\times T}$ represents the factors log returns.
X = log_ret_prices.to_numpy().T
(N, T) = X.shape
F = np.vstack((np.ones((1,T)), log_ret_factors.to_numpy().T))
print((N,T))
(10, 1459)
Gamma = cp.Variable((N,3))
obj = cp.sum_squares(X - Gamma@ F)
problem = cp.Problem(cp.Minimize(obj))
result = problem.solve(verbose=False)
# alpha = Gamma.value[:,0]
# beta = Gamma.value[:,1:3]
# print('alpha = \n', alpha)
# print('beta = \n', beta)
Gamma = pd.DataFrame(Gamma.value, columns = ["alpha", "beta1","beta2"], index=stocks)
print(Gamma)
alpha beta1 beta2 TSLA 0.000937 0.388590 0.163798 AMZN -0.000170 0.555907 0.061970 EBAY 0.000125 0.422265 0.035914 AAPL 0.000629 0.466464 0.066674 MSFT 0.000382 0.585149 0.061792 META -0.000340 0.628715 0.067122 GOOGL 0.000138 0.507886 0.059752 NFLX -0.000206 0.714680 0.060979 IBM 0.000124 0.574797 0.007824 NVDA 0.000717 0.452343 0.140429
Now using the closed form solution \begin{equation} \boldsymbol{\Gamma}^* = \underset{\boldsymbol{\Gamma}}{\text{argmin}}\, \|\mathbf{X} - \boldsymbol{\Gamma}\mathbf{F}\|_F^2 = \mathbf{X} \mathbf{F}^\top (\mathbf{F}\mathbf{F}^\top)^{-1} \end{equation}
Gamma = np.linalg.solve( F@F.T, F@X.T).T
# alpha = Gamma[:,0]
# beta = Gamma[:,1:3]
# print('closed-form alpha =\n ', alpha)
# print('closed-form beta =\n', beta)
Gamma = pd.DataFrame(Gamma, columns = ["alpha", "beta1","beta2"], index=stocks)
print('closed-form solution:')
print(Gamma)
closed-form solution:
alpha beta1 beta2
TSLA 0.000937 0.388590 0.163798
AMZN -0.000170 0.555907 0.061970
EBAY 0.000125 0.422265 0.035914
AAPL 0.000629 0.466464 0.066674
MSFT 0.000382 0.585149 0.061792
META -0.000340 0.628715 0.067122
GOOGL 0.000138 0.507886 0.059752
NFLX -0.000206 0.714680 0.060979
IBM 0.000124 0.574797 0.007824
NVDA 0.000717 0.452343 0.140429
error = np.linalg.norm(X - Gamma@ F, 'fro')
print("Error closed form: ", error)
Error closed form: 2.0854391368107734
For this part we use fig.add_subplot(projection='3d') method to draw a 3D scatter plot
plt.rcParams.update(plt.rcParamsDefault)
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
Gamma = Gamma.to_numpy()
for i in range(N):
ax.scatter(Gamma[i,1],Gamma[i,2],Gamma[i,0],color='b')
ax.text(Gamma[i,1],Gamma[i,2],Gamma[i,0], '%s' % ( stocks[i]), size=10, zorder=1, color='k')
ax.set_xlabel('beta_1')
ax.set_ylabel('beta_2')
ax.set_zlabel('alpha')
plt.show()
We then use animation module to animate the figure
rc('animation', html='jshtml')
def animate(frame):
ax.view_init(30, frame/4)
plt.pause(.001)
return fig
anim = animation.FuncAnimation(fig, animate, frames=200, interval=2)
# anim.save('animationBrownianMotion2d.gif', writer='pillow', fps=60)
anim